self_prompting: 
  id: self_prompting.full
  metrics: [accuracy, accuracy_fuzzy, n_samples]
  description: Evaluate the ability of models to prompt other models to perform single-turn eval tasks.

self_prompting.full:
  class: evals.elsuite.self_prompting.eval:SelfPrompting
  args:
    samples_jsonl: self_prompting/samples.jsonl
    tasker_models: ["gpt-3.5-turbo", "gpt-4-base", "gpt-4"]
    n_tasks: 50
    n_samples_per_task: 10
    baseline_logpath: self_prompting/oriprompt.log

self_prompting.small:
  class: evals.elsuite.self_prompting.eval:SelfPrompting
  args:
    samples_jsonl: self_prompting/samples.jsonl
    tasker_models: ["gpt-3.5-turbo"]
    n_tasks: 50
    n_samples_per_task: 1
